home *** CD-ROM | disk | FTP | other *** search
/ Chip 2004 July / CMCD0704.ISO / Software / Shareware / Comunicatii / jaeger / jaeger.exe / {app} / Tools / ToolGoogleAlerts.py < prev    next >
Text File  |  2004-04-03  |  13KB  |  441 lines

  1. #!/bin/python
  2.  
  3. """
  4. ToolGoogleAlerts.py
  5.  
  6. David Janes
  7. BlogMatrix
  8. 2004.03.30
  9. """
  10.  
  11. import sys
  12. import urllib
  13. import time
  14. import pprint
  15. import poplib
  16. import re
  17. import os
  18. import xml.sax.saxutils
  19.  
  20. import Tool
  21. import cStringIO
  22.  
  23. block_re = """([^\s][^\n]*)$(.*?)^<([^\n]*)>$"""
  24. block_rex = re.compile(block_re, re.I|re.MULTILINE|re.DOTALL)
  25.  
  26. remove_re = """^Remove this News Alert:(.*?)(http://www.google.com/newsalerts[^\s]*?)$"""
  27. remove_rex = re.compile(remove_re, re.I|re.MULTILINE|re.DOTALL)
  28.  
  29. confirm_re = """News Alerts.*Verification Email"""
  30. confirm_rex = re.compile(confirm_re, re.I|re.MULTILINE|re.DOTALL)
  31.  
  32. verify_re = """^Verify this News Alert request:(.*?)(http://www.google.com/newsalerts[^\s]*?)$"""
  33. verify_rex = re.compile(verify_re, re.I|re.MULTILINE|re.DOTALL)
  34.  
  35. class ToolGoogleAlerts(Tool.ToolInterface):
  36.     """
  37.     The interface for your tool. Simply create an instance of this object and Jaeger
  38.     will figure it out.
  39.     """
  40.     def __init__(self):
  41.         Tool.ToolInterface.__init__(self, self.WEBSERVER)
  42.  
  43.     def get_label(self, selected):
  44.         return    "Google News Alerts"
  45.  
  46.     def get_server(self, path):
  47.         """
  48.         """
  49.         return    {
  50.             "/": ( self.serve_root, "" ),
  51.             "/setup": ( self.serve_setup, "Setup Google News Alerts" ),
  52.             "/alerts/feed": ( self.serve_feed, "Feed" ),
  53.         }.get(path)
  54.  
  55.     def serve_root(self, operations, path, valuemap):
  56.         result = [
  57.             self.text_standard_header(path),
  58.             """
  59. <h2>Google News Alerts</h2>
  60. <p class="first">
  61. This extension lets you convert Google News Alerts arriving by e-mail
  62. into syndication feeds for Jäger.
  63.  
  64. <ul>
  65. <li>
  66. <a href="setup">Setup Google News Alerts</a> (do this once)
  67.  
  68. <li>
  69. Create new alerts <b><a target=_blank href="http://www.google.com/newsalerts?hl=en">using Google</a></b>.
  70.  
  71. </ul>
  72. """,
  73.             """<h3>Your Alerts</h3>
  74. <p class="first">
  75. Subscribe to Alerts by dragging the <img align=bottom src="/images/xml.gif"> button to Jäger's window.
  76. """,
  77. ]
  78.  
  79.         #
  80.         #    subscriptions
  81.         #
  82.         all_alerts = operations.get_userdata('alerts', None)
  83.         if not all_alerts:
  84.             all_alerts = {}
  85.             operations.set_userdata('alerts', all_alerts)
  86.  
  87.         blogs = operations.get_weblogs()
  88.         blog_urls = map(lambda b : b.get("url", ""), blogs)
  89.  
  90.         subscriptions = []
  91.         for subject, ( alert_list, alert_map ) in all_alerts.iteritems():
  92.             google_url = "http://news.google.com/news?hl=en&q=%s" % urllib.quote(subject)
  93.             feed_url = "alerts/feed?subject=%s" % urllib.quote(subject)
  94.             subscriptions.append(( subject, google_url, feed_url, alert_map.get("remove"), google_url in blog_urls ))
  95.  
  96.         subscriptions.sort()
  97.         for is_subscribed in [ True, False ]:
  98.             if is_subscribed: result.append("<h4>Alerts that are subscribed to</h4><ul>")
  99.             else: result.append("<h4>Alerts that are NOT subscribed to</h4><ul>")
  100.  
  101.             for subject, google_url, feed_url, cancel_url, subscribed in subscriptions:
  102.                 if subscribed != is_subscribed: continue
  103.                 result.append("""<li><a href=%s><img align=bottom border=0 src="/images/xml.gif"></a>  <a target=_blank href=%s>%s</a>  [<a target=_blank href=%s>Cancel</a>]""" % ( \
  104.                     self.quote_attribute(feed_url),
  105.                     self.quote_attribute(google_url),
  106.                     self.escape_html(subject),
  107.                     self.quote_attribute(cancel_url),
  108.                     ))
  109.             result.append("</ul>")
  110.  
  111.         result.append(self.text_standard_footer())
  112.  
  113.         return    200, "text/html", None, result
  114.  
  115.     def serve_setup(self, operations, path, valuemap):
  116.         is_form = valuemap.get("_form")
  117.  
  118.         hostname = valuemap.get("hostname")
  119.         if hostname == None:
  120.             hostname = operations.get_userdata("hostname", "")
  121.         else:
  122.             operations.set_userdata("hostname", hostname)
  123.  
  124.         account = valuemap.get("account")
  125.         if account == None:
  126.             account = operations.get_userdata("account", "")
  127.         else:
  128.             operations.set_userdata("account", account)
  129.  
  130.         password = valuemap.get("password")
  131.         if password == None:
  132.             password = operations.get_userdata("password", "")
  133.         else:
  134.             operations.set_userdata("password", password)
  135.  
  136.         enabled = valuemap.get("enabled", "")
  137.         if not is_form:
  138.             enabled = operations.get_userdata("enabled", "")
  139.         else:
  140.             operations.set_userdata("enabled", enabled)
  141.  
  142.         dodelete = valuemap.get("delete", "")
  143.         if not is_form:
  144.             dodelete = operations.get_userdata("delete", "")
  145.         else:
  146.             operations.set_userdata("delete", dodelete)
  147.  
  148.         autosubscribe = valuemap.get("autosubscribe", "")
  149.         if not is_form:
  150.             autosubscribe = operations.get_userdata("autosubscribe", "on")
  151.         else:
  152.             operations.set_userdata("autosubscribe", autosubscribe)
  153.  
  154.         result = [
  155.             self.text_standard_header(path),
  156.             """
  157. <h2>Setup Google News Alerts</h2>
  158. <p class="first">
  159. """,
  160.             """\
  161. <form method="POST">
  162. <h3>Your E-Mail Address</h3>
  163. <p class="first">
  164. This is the e-mail address that you are telling Google News to send alerts to.
  165.  
  166. <p>
  167. <table>
  168. <tr>
  169. <td align="left" width=120>E-Mail Address:</td>
  170. <td></td>
  171. <td><input type="text" name="account" value=\"""" + self.escape_html(account) + """"></td>
  172. </tr>
  173.  
  174. </table>
  175.  
  176. <h3>E-Mail Account Information</h3>
  177. <h4>POP3 Information</h4>
  178. <p class="first">
  179. If you receive your e-mail using a POP3 mail account,
  180. fill in the information here.
  181. We recommend that you create a seperate POP3 account for Google
  182. News Alerts, but if you can't (or don't know how to) do this, it's OK.
  183.  
  184. <p>
  185. <table>
  186. <tr>
  187. <td align="left" width=120>Hostname:</td>
  188. <td> </td>
  189. <td><input type="text" name="hostname" value=\"""" + self.escape_html(hostname) + """"></td>
  190. </tr>
  191.  
  192. <tr>
  193. <td align="left">Password:</td>
  194. <td></td>
  195. <td><input type="password" name="password" value=\"""" + self.escape_html(password) + """"></td>
  196. </tr>
  197.  
  198. </table>
  199.  
  200. <h4>Outlook Information</h4>
  201. <p class="first">
  202. If you receive your e-mail using Outlook (<b>not</b> Outlook Express),
  203. fill in the information here. <i>Coming soon</i>.
  204.  
  205. <h3>Options</h3>
  206. <p class="first">
  207. This extension will not run until you click the "Enable" button.
  208. If you select "Auto-subscribe",
  209. you will be prompted as soon as any new Google News Alert e-mails are discovered
  210. (i.e. you may be doing something else entirely!)
  211.  
  212. <p>
  213. <table>
  214. <tr>
  215. <td align="left" width=120>Enable:</td>
  216. <td></td>
  217. <td><input type="checkbox" name="enabled" """ + ( enabled and "checked" or "" ) + """></td>
  218. </tr>
  219.  
  220. <tr>
  221. <td align="left">Auto-subscribe:</td>
  222. <td></td>
  223. <td><input type="checkbox" name="autosubscribe" """ + ( autosubscribe and "checked" or "" ) + """></td>
  224. </tr>
  225.  
  226. <tr>
  227. <td></td>
  228. <td></td>
  229. <td><input type="Submit" name="submit" value="Save"></td>
  230. </tr>
  231. </table>
  232. """ ]
  233.  
  234.         result.append(self.text_standard_footer())
  235.         return    200, "text/html", None, result
  236.  
  237.     def serve_feed(self, operations, path, valuemap):
  238.         subject_alert = valuemap.get("subject", "")
  239.         if not subject_alert:
  240.             return    404, "text/html", self.text_filenotfound()
  241.  
  242.         print >> sys.stderr, "serve_feed: subject='%s'" % subject_alert
  243.  
  244.         all_alerts = operations.get_userdata('alerts', None)
  245.         if not all_alerts:
  246.             all_alerts = {}
  247.             operations.set_userdata('alerts', all_alerts)
  248.  
  249.         # pprint.pprint(all_alerts.keys())
  250.  
  251.         alert = all_alerts.get(subject_alert)
  252.         if not alert:
  253.             return    404, "text/html", self.text_filenotfound()
  254.  
  255.         alert_list, alert_map = alert
  256.  
  257.         #
  258.         #    output the RSS
  259.         #
  260.         result = []
  261.  
  262.         result.append('<?xml version="1.0" encoding="iso-8859-1"?>')
  263.         result.append('<rss version="0.92">')
  264.         result.append('<channel>')
  265.  
  266.         result.append("<title>Google News Alerts for %s</title>" % \
  267.             xml.sax.saxutils.escape(subject_alert))
  268.         result.append("<link>http://news.google.com/news?hl=en&q=%s</link>" % \
  269.             urllib.quote(subject_alert))
  270.  
  271.         for x in range(len(alert_list), 0, -1):
  272.             itime, iurl, ititle, imap = alert_list[x - 1]
  273.             ituple = time.localtime(itime)
  274.  
  275.             result.append(' <item>')
  276.             result.append('  <link>%s</link>' % xml.sax.saxutils.escape(iurl))
  277.             result.append('  <title>%s</title>' % xml.sax.saxutils.escape(ititle))
  278.             result.append('  <pubDate>%s</pubDate>' % time.strftime("%a, %d %b %Y %H:%M:%S GMT", ituple))
  279.             result.append(' </item>')
  280.  
  281.         result.append('</channel>')
  282.         result.append('</rss>')
  283.  
  284.         return    200, "application/xml", None, result
  285.  
  286.  
  287.     def pulse(self, operations):
  288.         hostname = operations.get_userdata("hostname", "")
  289.         if not hostname: return
  290.  
  291.         account = operations.get_userdata("account", "")
  292.         if not account: return
  293.  
  294.         password = operations.get_userdata("password", "")
  295.         if not password: return
  296.  
  297.         enabled = operations.get_userdata("enabled", "")
  298.         if not enabled: return
  299.  
  300.         interval_minutes = 10
  301.         try: interval_minutes = int(os.environ.get('JAEGER_ALERTS_INTERVAL', '10'))
  302.         except: pass
  303.  
  304.         last_attempt = operations.get_userdata("last_attempt", 0)
  305.         if last_attempt + interval_minutes * 60 > time.time():
  306.             return
  307.         operations.set_userdata("last_attempt", time.time())
  308.  
  309.         print >> sys.stderr, "ToolGoogleAlerts"
  310.  
  311.         connection = poplib.POP3(hostname)
  312.         connection.user(account)
  313.         connection.pass_(password)
  314.         numMessages = len(connection.list()[1])
  315.         for i in range(numMessages):
  316.             lines = []
  317.             for j in connection.retr(i+1)[1]:
  318.                 lines.append(j)
  319.  
  320.             self.process_message(operations, "\n".join(lines))
  321.             # self.process_message(os.linesep.join(lines))
  322.             # pprint.pprint(("ToolGoogleAlerts: *******", i, lines))
  323.  
  324.     def process_message(self, operations, message):
  325.         #
  326.         #    get the current alerts
  327.         #
  328.         all_alerts = operations.get_userdata('alerts', None)
  329.         if not all_alerts:
  330.             all_alerts = {}
  331.             operations.set_userdata('alerts', all_alerts)
  332.  
  333.         verified = operations.get_userdata('verified', None)
  334.         if not verified:
  335.             verified = {}
  336.             operations.set_userdata('verified', verified)
  337.  
  338.         try:
  339.             import types
  340.             import email
  341.             import email.Utils
  342.  
  343.             msg = email.message_from_string(message)
  344.  
  345.             frm = msg.get('From')
  346.             if frm != 'newsalerts-noreply@google.com': return
  347.  
  348.             date = msg.get('Date')
  349.             if not date: return
  350.  
  351.             pdate = email.Utils.parsedate(date)
  352.             if not pdate: return
  353.  
  354.             tdate = time.mktime(pdate)
  355.             if (time.time() - tdate) / 3600 > (24 * 3):
  356.                 return
  357.  
  358.             msgid = msg.get('Message-ID')
  359.  
  360.             payload = msg.get_payload()
  361.             if not type(payload) in types.StringTypes: return
  362.  
  363.             subject = msg.get('Subject')
  364.             # print >> sys.stderr, "VERIFY: A", subject
  365.             if confirm_rex.match(subject):
  366.                 #
  367.                 #    This code handles verification
  368.                 #
  369.                 # print >> sys.stderr, "VERIFY: A.1"
  370.                 match = verify_rex.search(payload)
  371.                 if match:
  372.                     # print >> sys.stderr, "VERIFY: B"
  373.                     url = match.group(2)
  374.                     if not verified.get(url):
  375.                         # print >> sys.stderr, "VERIFY: C"
  376.                         operations.log("verifying '%s'" % url)
  377.  
  378.                         try:
  379.                             f = urllib.urlopen(url)
  380.                             f.read()
  381.                             f.close()
  382.  
  383.                             verified[url] = 1
  384.                         except:
  385.                             print >> sys.stderr, "ToolGoogleAlerts.process_message: caught exception"
  386.                             traceback.print_exc(file = sys.stderr)
  387.  
  388.                 # print >> sys.stderr, "VERIFY: D"
  389.             elif subject[:20] == 'Google News Alert - ':
  390.                 subject_alert = subject[20:]
  391.  
  392.                 #
  393.                 #
  394.                 alert = all_alerts.get(subject_alert, ( [], {} ))
  395.                 # pprint.pprint(alert)
  396.                 alert_list, alert_map = alert
  397.  
  398.                 is_new_subject = not bool(alert_list)
  399.  
  400.                 ## make a list of all URLs that we know about
  401.                 known_urls = {}
  402.                 for item in alert_list:
  403.                     known_urls[item[1]] = 1
  404.  
  405.                 ## add new stuff
  406.                 for match in block_rex.finditer(payload):
  407.                     iurl = match.group(3)
  408.                     if known_urls.get(iurl):
  409.                         continue
  410.  
  411.                     ititle = match.group(1)
  412.                     icontents = match.group(2).strip()
  413.  
  414.                     alert_list.append(( tdate, iurl, ititle, {} ))
  415.                     known_urls[iurl] = 1
  416.  
  417.                     print "ToolGoogleAlerts:", iurl, ititle
  418.  
  419.                 ## discover the 'remove url' for unsubscribing
  420.                 match = remove_rex.search(payload)
  421.                 if match:
  422.                     alert_map["remove"] = match.group(2)
  423.  
  424.                 ## save updated values
  425.                 all_alerts[subject_alert] = ( alert_list, alert_map )
  426.  
  427.                 if is_new_subject and operations.get_userdata("enabled", "autosubscribe"):
  428.                     feed_url = "%salerts/feed?subject=%s" % \
  429.                         ( self.tool_root(full=True), urllib.quote(subject_alert) )
  430.                     operations.subscribe_to(feed_url)
  431.  
  432.                 # pprint.pprint(("ToolGoogleAlerts", alert ))
  433.         except:
  434.             print >> sys.stderr, "ToolGoogleAlerts.process_message: caught exception"
  435.             traceback.print_exc(file = sys.stderr)
  436.  
  437. #
  438. # creating it will register it
  439. #
  440. ToolGoogleAlerts()
  441.